In [1]:
import pandas as pd
from pandas import DataFrame as DF, Series
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
When creating a graph object, it can either be empty (default) or you can pass data as an argument. The data can take multiple forms:
Let's start by creating an empty graph, and then add nodes.
In [2]:
# isntantiate a graph object
G = nx.Graph()
# add a single node
G.add_node(1)
# add multiple nodes from a list
G.add_nodes_from([2,3,5])
# return lists of nodes and edges in the graph
G.nodes(), G.edges()
Out[2]:
Notice that the edge list is empty, since we haven't added any edges yet. Also, because the number of edges in a graph can become very large, there is an iterator method for returning edges: edges_iter()
.
In [3]:
# add a single edge between 3 and 5
G.add_edge(3,5)
# add multiple edges using list of tuples
edge_list = [(1,2),(2,3),(2,5)]
G.add_edges_from(edge_list)
G.edges()
Out[3]:
If we had a script that needs to add a single edge from a tuple, we would use *
preceeding the tuple or it's assigned variable in the add_edge
method:
In [4]:
# the asterisk indicates that the values should be extracted
G.add_edge(*(1,3))
G.edges()
Out[4]:
We can also add nodes and edges using objects called nbunch and ebunch. These objects are any iterables or generators of nodes or edge tuples. We will do this below using a couple different methods.
In [5]:
# generate a graph of linearly connected nodes
# this is a graph of a single path with 5 nodes and 4 edges
H = nx.path_graph(5)
# a look at the nodes and edges produced
H.nodes(), H.edges()
Out[5]:
In [6]:
# Create a graph using nbunch and ebunch from the graph H
G = nx.Graph()
G.add_nodes_from(H)
# we have to specify edges
G.add_edges_from(H.edges())
G.nodes(), G.edges()
Out[6]:
In [7]:
# now add edges to a graph using an iterator instead of an iterable list
# this is another example of ebunch, and node iterators work too
G = nx.Graph()
G.add_nodes_from([1,2,3])
# create edge generator connecting all possible node pairs
from itertools import combinations
edge_generator = combinations([1,2,3], 2)
# to show this is a generator and not a list
print('not a list: ', edge_generator)
# now lets add the edges using the iterator
G.add_edges_from(edge_generator)
G.edges()
Out[7]:
We can also remove nodes or edges using similar methods, just replacing 'add' with 'remove':
In [8]:
H.nodes(), H.edges()
Out[8]:
In [9]:
H.remove_nodes_from([0,4])
H.nodes(), H.edges()
Out[9]:
Notice that removing nodes automatically removed the related edges for us. One last basic inspection method is to get a list of neighbors (adjacent nodes) for a specific node in a graph.
In [10]:
H = nx.path_graph(7)
# get the neighbors for node 5
H.neighbors(5)
Out[10]:
While we have been using numbers to represent nodes, we can use any hashable object as a node. For example, this means that lists, sets and arrays can't be nodes, but frozensets can:
In [11]:
G = nx.Graph()
# G.add_node([0,1]) <-- raises error
# G.add_node({0,1}) <-- raises error
G.add_node(frozenset([0,1])) # this works
G.nodes()
Out[11]:
We can add weights or other properties to edges in a graph in different ways. The first is to add properties at creation time by passing triples instead of doubles for each edge. The third value will be the edge property.
In [12]:
G = nx.Graph()
G.add_nodes_from([1,2,3])
G.add_weighted_edges_from([(1,2,3.14), (2,3,6.5)])
# calling edges() alone will not return weights
print(G.edges(), '\n')
# we need to use the data parameter to get triples
print(G.edges(data='weight'), '\n')
# we can also get data for individual edges
print(G.get_edge_data(1,2))
We can use subscript notiation on a graph object to easily get edge data. Access edge data for a node by entering that node as the subscript. This will return a dict with connected nodes as keys, and their respective edge weights as values.
In [13]:
# get edge data for node 2
print(G[2], '\n')
# subscript further to get only the weight for edge between 2 and 3
print(G[2][3])
We can also modify specific edge attributes:
In [14]:
G[2][3]['weight'] = 17
G[2][3]
Out[14]:
And we can add other attributes
In [15]:
G[2][3]['attr'] = 'value'
G[2][3]
Out[15]:
Remember, a complete graph is an undirected graph that has every pair of nodes connected by a unique edge. In other words, every node is adjacent to every other.
In [16]:
# manually
from itertools import combinations
complete_edges = combinations(range(7), 2)
G_complete = nx.Graph(complete_edges)
G_complete.edges()
Out[16]:
In [17]:
# built-in method
G_complete = nx.complete_graph(7)
G_complete.edges()
Out[17]:
In [6]:
# function to draw and label nodes in a graph
def draw(G, layout):
import warnings
import matplotlib.cbook
warnings.filterwarnings("ignore",category=matplotlib.cbook.mplDeprecation)
warnings.filterwarnings("ignore",category=UserWarning)
nx.draw(G, pos=layout(G))
nx.draw_networkx_labels(G, pos=layout(G));
In [19]:
draw(G_complete, nx.circular_layout)
Recall that a component is a connected subgraph.
In [20]:
from networkx.drawing.nx_agraph import graphviz_layout
edges_1 = [(0,1), (0,2)]
edges_2 = [(3,4), (3,5), (4,5), (4,6)]
edges_3 = [(7,8), (7,9)]
G = nx.Graph(edges_1 + edges_2 + edges_3)
draw(G, graphviz_layout)
In [21]:
# create in and out edge lists
out_edges = [(0,1), (0,2), (1,2), (2,3)]
# create the empty digraph
G = nx.DiGraph(out_edges)
draw(G, graphviz_layout)
In [22]:
out_edges = [(0,1,0.5), (0,2,0.5), (1,2,1), (2,3,0.7)]
G = nx.DiGraph()
G.add_weighted_edges_from(out_edges)
draw(G, graphviz_layout)
labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos=graphviz_layout(G), edge_labels=labels);
In [3]:
import pickle
with open('../edges_1.pkl', 'rb') as f:
edges = pickle.load(f)
In [24]:
G = nx.Graph(edges)
# lets see what it looks like
draw(G, graphviz_layout)
In [25]:
adj_matrix = nx.to_numpy_matrix(G)
DF(adj_matrix)
Out[25]:
In [26]:
geodesics = nx.all_pairs_shortest_path_length(G)
# this gave us a dict of shortest path lengths between all connected pairs
geodesics
Out[26]:
In [27]:
# we can easily convert this to a matrix using pandas
DF(geodesics)
Out[27]:
In [28]:
# 4 is a cutpoint
G.remove_node(4)
draw(G, graphviz_layout)
In [29]:
# G = nx.moebius_kantor_graph()
G = nx.dorogovtsev_goltsev_mendes_graph(3)
In [7]:
with open('../edges_2.pkl', 'rb') as f:
edges = pickle.load(f)
G = nx.Graph(edges)
# draw(G, graphviz_layout)
draw(G, nx.circular_layout)
In [31]:
list(nx.find_cliques(G))
Out[31]:
In [32]:
degree = nx.degree_centrality(G)
closeness = nx.closeness_centrality(G)
betweenness = nx.betweenness_centrality(G)
In [33]:
Series(degree)
Out[33]:
In [34]:
Series(closeness)
Out[34]:
In [35]:
Series(betweenness)
Out[35]:
When considering control over flow of information, we look at the betweenness measure. In this case, actors 0, 1, and 2 have the greatest control over information flow. The actors with betweenness measures of zero have no geodesics through them connecting other pairs of nodes. For example, there is a path from 0 to 4 that goes through 9, but that has a length of 2 and 0 connects to 4 directly.
In [8]:
H = G.copy()
H.remove_node(0)
H.add_edge(10,14)
H.remove_edge(1,2)
# draw(H, graphviz_layout)
draw(H, nx.circular_layout)
In [9]:
# eccentricity of node 1
nx.eccentricity(H, 1)
Out[9]:
In [11]:
# find cliques containing node 1
nx.cliques_containing_node(H, 1)
Out[11]:
In [39]:
# density
nx.density(H)
Out[39]:
In [40]:
# remove node 1
H.remove_node(1)
nx.density(H)
Out[40]:
In [41]:
with open('edges_3.pkl', 'rb') as f:
edges = pickle.load(f)
G = nx.DiGraph(edges)
draw(G, graphviz_layout)
In [42]:
# adjacency matrix
adj_matrix = nx.to_numpy_matrix(G)
DF(adj_matrix)
Out[42]:
In [43]:
# indegree and outdegree
indegree = adj_matrix.sum(axis=0) / (len(adj_matrix)-1)
outdegree = adj_matrix.sum(axis=1) / (len(adj_matrix)-1)
in_method = nx.in_degree_centrality(G)
out_method = nx.out_degree_centrality(G)
In [44]:
# indegree comparison
(Series(np.array(indegree).flatten()) == Series(in_method)).all()
Out[44]:
In [45]:
# outdegree comparison
(Series(np.array(outdegree).flatten()) == Series(out_method)).all()
Out[45]:
In [46]:
Series(in_method).sort_values(ascending=False)
Out[46]:
Node 0 has the greatest in-degree centrality, which is obvious when looking at the graph.